<?php

/**
 * Parser for Wikipedia page
 */
class WikipediaParser {

    /**
     * Parse information about movie
     * 
     * @param string $url
     * 
     * @return array
     */
    public function parseMovie($url) {
        $result = array();
        // Get URL to string
        $page = file_get_contents($url);
        if ($page) {
            $matches = array();
            // Get language
            if (preg_match('/<html.*?lang="([a-z]{2}?)"/', $page, $matches)) {
                $result['language'] = $matches[1];
            }
            // Get title
            if (preg_match('/<h1.*?><span.*?>(.+?)<\/span>/', $page, $matches)) {
                $result['title'] = str_replace(' (фильм)', '', $matches[1]);
            }
            // Get subject
            if (preg_match('/Сюжет<\/span>.*?<\/h2>([\s\S]+?)<h2>/', $page, $matches)) {
                $result['subject'] = strip_tags($matches[1], '<p>');
            }
            // Get image
            if (preg_match('/img alt="Постер фильма" src="(.+?)"/', $page, $matches)) {
                $result['image'] = 'http:'.$matches[1];
            }
            // Get genres
            if (preg_match('/Жанр.*?<\/th>\s<td.*?>\s<p>([\s\S]+?)<\/p>/', $page, $matches)) {
                $result['genres'] = preg_split('/<br \/>\s/', strip_tags($matches[1], '<br>'));
            }
            // Get directors
            if (preg_match('/Режиссёр.*?<\/th>\s<td.*?>\s<p>([\s\S]+?)<\/p>/', $page, $matches)) {
                $result['directors'] = preg_split('/<br \/>\s/', strip_tags($matches[1], '<br>'));
            }
            // Get producers
            if (preg_match('/Продюсер.*?<\/th>\s<td.*?>\s<p>([\s\S]+?)<\/p>/', $page, $matches)) {
                $result['producers'] = preg_split('/<br \/>\s/', strip_tags($matches[1], '<br>'));
            }
            // Get screenwriters
            if (preg_match('/Автор<br \/>\sсценария.*?<\/th>\s<td.*?>\s<p>([\s\S]+?)<\/p>/', $page, $matches)) {
                $result['screenwriters'] = preg_split('/<br \/>\s/', strip_tags($matches[1], '<br>'));
            }
            // Get actors
            if (preg_match('/В&#160;главных<br \/>\sролях.*?<\/th>\s<td.*?>\s<p>([\s\S]+?)<\/p>/', $page, $matches)) {
                $result['actors'] = preg_split('/<br \/>\s/', strip_tags($matches[1], '<br>'));
            }
            // Get companies
            if (preg_match('/Кинокомпания.*?<\/th>\s<td.*?>\s<p>([\s\S]+?)<\/p>/', $page, $matches)) {
                $result['companies'] = preg_split('/<br \/>\s/', strip_tags($matches[1], '<br>'));
            }
            // Get duration
            if (preg_match('/Длительность.*?<\/th>\s<td.*?>\s<p>(.+?)<\/p>/', $page, $matches)) {
                $result['duration'] = $matches[1];
            }
            // Get countries
            if (preg_match('/Страна.*?<\/th>\s<td.*?>\s<p>([\s\S]+?)<\/p>/', $page, $matches)) {
                $result['countries'] = preg_split('/<br \/>\s/', str_replace('&#160;', '', strip_tags($matches[1], '<br>')));
            }
            // Get year
            if (preg_match('/Год.*?<\/th>\s<td.*?>\s<p>(.+?)<\/p>/', $page, $matches)) {
                $result['year'] = strip_tags($matches[1]);
            }
        }
        return $result;
    }

}